import numpy.random as rnd
from sklearn import manifold

from ..common.utils import *
from ..common.data_plotter import *
from .casas import Casas, maybe_download_casas
from .word2vec import Word2vec
from .word2vec_custom import CustomWord2vec

# Just disables the warning, doesn't enable AVX/FMA
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

"""
Simple word2vec model for acitivity labels. We will try to find an embedding
of the sensors in an apartment. This embedding will be derived from the
sequential sensor triggerings as the occupant of the apartment moves or
conducts daily activities. The goal is to show that relative positions of 
sensors in the embedding space reflect their relative positions in the real-world.

To compare the embedding and the real layout, check the pdf output generated by
this program and compare with ../datasets/CASAS/floor_plans/HH101-sensormap.png

This is a simple application of word2vec on a small dataset and should be good 
for educational purposes.

The activity dataset is obtained from: http://casas.wsu.edu/datasets/hh101.zip
This dataset has sensor readings for a single-occupant home. Multiple sensors are
placed at various locations in an apartment and they turn ON/OFF based on the
occupant's movements (we only use the sensors whose ids start with 'M' or 'MA').

The related floor plan showing the sensor layout can be found at:
    http://ailab.wsu.edu/casas/hh/hh101/profile/page-6.html

More details on the setup can be found in:
    D. Cook, A. Crandall, B. Thomas, and N. Krishnan.
    CASAS: A smart home in a box. IEEE Computer, 46(7):62-69, 2013.
    
    and on the CASAS website: http://ailab.wsu.edu/casas/hh

To execute:
pythonw -m ad_examples.timeseries.activity_word2vec --log_file=temp/timeseries/activity_word2vec.log --n_epochs=1 --debug
"""


if __name__ == "__main__":

    logger = logging.getLogger(__name__)

    dir_create("./temp/timeseries")  # for logging and plots

    args = get_command_args(debug=False, debug_args=["--n_lags=20",
                                                     "--n_epochs=1",
                                                     "--debug",
                                                     "--plot",
                                                     "--log_file=temp/timeseries/activity_word2vec.log"])
    # print "log file: %s" % args.log_file
    configure_logger(args)

    random.seed(42)
    rnd.seed(42)

    localpath = "./temp/timeseries"
    url = "http://casas.wsu.edu/datasets/"
    casasfile = maybe_download_casas("hh101.zip", url, "hh101/ann.txt", localpath)
    logger.debug("casasfile: %s" % casasfile)
    cas = Casas(dataset_path=casasfile)

    logger.debug("cas.sensor2code:\n%s" % str(cas.sensor2code))
    logger.debug("cas.code2sensor:\n%s" % str(cas.code2sensor))
    logger.debug("cas.sensor_seq:\n%s" % str(cas.sensor_seq[0:10]))
    logger.debug("cas.sensor_enc:\n%s" % str(cas.sensor_enc[0:10]))

    timer = Timer()

    modes = {0: "", 1: "custom", 2: "original"}

    mode = 2

    dims = 15
    window_size = 3
    n_epochs = args.n_epochs
    neg_samples = 3
    normalize_embeddings = False
    use_tsne = dims > 2

    w2v = None
    embeddings = None

    signature = "d%d%s%s%s" % (dims, "_norm" if normalize_embeddings else "",
                               "_" + modes[mode], "_tsne" if use_tsne else "")

    logger.debug("signature: %s" % signature)

    # write_sensor_data_as_document(cas)

    if mode == 1:
        w2v = CustomWord2vec(sensors=cas.sensors,
                             code2sensor=cas.code2sensor, sensor2code=cas.sensor2code,
                             dims=dims, window_size=window_size, neg_samples=neg_samples,
                             n_epochs=n_epochs, debug=args.debug)
        w2v.fit(cas.sensor_enc)
        logger.debug(timer.message("Completed training in"))
        embeddings = w2v.get_embeddings(normalized=normalize_embeddings)
    elif mode == 2:
        w2v = Word2vec(sensors=cas.sensors,
                       code2sensor=cas.code2sensor, sensor2code=cas.sensor2code,
                       dims=dims, window_size=window_size, n_epochs=n_epochs, num_skips=2,
                       num_sampled=neg_samples, num_steps=100001, debug=args.debug)
        w2v.fit(cas.sensor_enc)
        embeddings = w2v.get_embeddings(normalized=normalize_embeddings)

    if embeddings is not None:
        logger.debug(embeddings.shape)
        # logger.debug(embeddings)

        if not use_tsne:
            x_tr = embeddings
        else:
            logger.debug("computing t-SNE for embedded space...")
            # perplexity=30, as used in original word2vec, does not result
            # in good visualization...
            tsne_embed = manifold.TSNE(perplexity=3,
                                       n_components=2, init='pca',
                                       random_state=0, method='exact', n_iter=5000)
            x_tr = tsne_embed.fit_transform(embeddings)

        # get the floor plan image
        # img = mpimg.imread("../datasets/CASAS/floor_plans/HH101-sensormap.png")

        pdfpath = "temp/timeseries/activity_sensors_%s.pdf" % signature
        dp = DataPlotter(pdfpath=pdfpath, rows=1, cols=1)

        # pl = dp.get_next_plot()
        # pl.imshow(img)

        pl = dp.get_next_plot()
        dp.plot_points(x_tr, pl, marker='+', s=20)
        for k in cas.code2sensor.keys():
            pl.text(x_tr[k, 0], x_tr[k, 1], cas.code2sensor[k])

        dp.close()
